2024 Dev/GlowwormGenes.R

#' @title Generate gene lists from GWAS summary statistics.
#' @param Input GWAS summary statistics containing columns for SNP, chromosome, position and p-values
#' @param Window Flanking window in kB. Size includes window both upstream and downstream. Default is 100kB which gives a 200 kB window.
#' @return This function takes the GWAS summary statistcs and generates a list of neighbor genes which can then be used for Glowworm gene prioritization. \cr \cr The output includes a list containing:
#' \itemize{
#'     \item FullOutput - a data frame containing the summary statistics with a new row for each gene expressed in the set window, along with the distance from the SNP
#'     \item UniqueGenesinWindow - a vector of unique genes within the flanking region
#'     \item NearestNeighborGene - a vector of unique genes located closest to the SNP}
#' @export


GlowwormGenes = function(Input, Window = 100, build = "hg19", snp = "SNP", chr = "CHR", bp = "POS", Pcol = "P", Pcutoff = 5e-08){
  OutsList = list()
  Input = subset(Input, Input[[Pcol]] < Pcutoff)

  Pull = find_nearest_gene(Input, flanking = Window, build = build, collapse = FALSE, snp = snp, chr = chr,bp = bp)
  OutsList[["FullOutput"]] = Pull
  Pull_Mapped = na.omit(Pull)
  OutsList[["UniqueGenesinWindow"]] = unique(Pull$GENE)

  Pull$distance = gsub("intergenic", 0, gsub("-", "", Pull$distance))
  Pull$distance = as.numeric(Pull$distance)

  KNN_Only = Pull %>% group_by(rsid) %>% top_n(n=1, wt = -distance)
  OutsList[["NearestNeighborGene"]] = unique(KNN_Only$GENE)
  ToPrint = paste("Summary of output:\n From summary statistics containing ", length(unique(Input[[snp]])), "unique SNPs, ", length(unique(Pull_Mapped$snp)), "were mapped to genes.\n A total of ", dim(OutsList[["UniqueGenesinWindow"]][1], "genes were found within the", as.numeric(Window)*2, "kB.\n", dim(OutsList[["NearestNeighbourGene"]][1]), "unique nearest neighbor genes were found.", sep=""))
  print()
  return(OutsList)
}
Hannahglover/Glowworm documentation built on Jan. 16, 2024, 11:47 p.m.